This is our first program in python: It is just started here
print("Hello World")
Hello World
$a=b+c$
x = 3
%whos # what variable is saved so far
Variable Type Data/Info ---------------------------- x int 3
print(type(x))
<class 'int'>
x = 5.7
%whos
Variable Type Data/Info ----------------------------- x float 5.7
print(type(x))
<class 'float'>
abcd = 556.32
%whos
Variable Type Data/Info ----------------------------- abcd float 556.32 x float 5.7
a,b,c,d,f = 3,5,6.0,7.2,-3
%whos
Variable Type Data/Info ----------------------------- a int 3 abcd float 556.32 b int 5 c float 6.0 d float 7.2 f int -3 x float 5.7
del abcd
%whos
Variable Type Data/Info ----------------------------- a int 3 b int 5 c float 6.0 d float 7.2 f int -3 x float 5.7
print(abcd)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[13], line 1 ----> 1 print(abcd) NameError: name 'abcd' is not defined
c = 2+4j
print(type(c))
<class 'complex'>
s = "hellow how are you"
print(type(s))
<class 'str'>
%whos
Variable Type Data/Info ------------------------------- a int 3 b int 5 c complex (2+4j) d float 7.2 f int -3 s str hellow how are you x float 5.7
sumOfaAndb = a+b #variables name should give you the look and feel what the data has
print(sumOfaAndb)
8
type(sumOfaAndb)
int
type(a+d) #float is super set than int and Python follows the super set
float
v = ((a+d)**3)/4
print(v)
265.30199999999996
s1 = "hellow"
s2 = "world"
s = s1+s2
print(s)
hellowworld
10//3 #quotient
3
10/3
3.3333333333333335
_ # stores the result of the above expression
3.3333333333333335
3x = 5 #can a variable name start with a digit i.e. 3x? NO
Cell In[30], line 1 3x = 5 ^ SyntaxError: invalid decimal literal
@y = 4 #can't start a variable name with @
Cell In[31], line 1 @y = 4 ^ SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
*t=4 #can't start a variable name with * including special sign other then _
Cell In[32], line 1 *t=4 ^ SyntaxError: starred assignment target must be in a list or tuple
_e = 6 # not recommended
startingTimeOfTheCourse = 2.0
%whos
Variable Type Data/Info ---------------------------------------------- a int 3 b int 5 c complex (2+4j) d float 7.2 f int -3 s str hellowworld s1 str hellow s2 str world startingTimeOfTheCourse float 2.0 sumOfaAndb int 8 v float 265.30199999999996 x float 5.7
a = True
b = True
c = False
%whos
Variable Type Data/Info -------------------------------------------- a bool True b bool True c bool False d float 7.2 f int -3 s str hellowworld s1 str hellow s2 str world startingTimeOfTheCourse float 2.0 sumOfaAndb int 8 v float 265.30199999999996 x float 5.7
print(a and b)
print(a and c)
print(c and a)
True False False
d = a or c
print(d)
True
not(a)
False
not(b)
False
not(c)
True
t = not(d)
type(t)
bool
print(t)
False
not((a and b) or (c or d))
False
print(2<3)
True
c = 2<3
print(type(c))
print(c)
<class 'bool'> True
d = 3==4
print(d)
False
3==3.0
True
x = 4
y = 9
z = 8.3
r = -3
(x<y) and (z<y) or (r==x)
True
(r==x) and (x<y) or (z>y)
False
(True or False) and False # and first and then or
False
print((not(2!=3)and Ture)or(False and True))
False
print(round(4.556))
5
print(round(4.345))
4
print(round(4.556389,3))
4.556
divmod(22,10)
(2, 2)
G = divmod(34,9)
type(G)
tuple
print(G)
(3, 7)
G[0]
3
G[1]
7
34//9
3
34%9 #remainder
7
isinstance(3,int) # is object a type of sth?
True
isinstance(3.4,(float,int))
True
isinstance(2+3j,(int,float,str,complex))
True
pow(2,4) # same with 2**4
16
2**4
16
pow(2,4,7) #2**4%7
2
x = input("enter a number :")
enter a number :56
type(x)
str
x = int(x) #change the type of x as integer
type(x)
int
print(x-34)
22
a = float(input("Enter a real number :"))
Enter a real number :12.5
type(a)
float
b = float(input("Enter a real number : "))
Enter a real number : abc
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[94], line 1 ----> 1 b = float(input("Enter a real number : ")) ValueError: could not convert string to float: 'abc'
pow? #don't know how to use the function then add ?
Cell In[98], line 1 pow? #don't know how to use the function then add ? ^ SyntaxError: invalid syntax
help(input) #don't know how to use the function then use help()
Help on method raw_input in module ipykernel.kernelbase:
raw_input(prompt='') method of ipykernel.ipkernel.IPythonKernel instance
Forward raw_input to frontends
Raises
------
StdinNotImplementedError if active frontend doesn't support stdin.
a = int(input())
b = int(input())
if a>b:
print(a)
print("I am still inside if condition")
print("I am outside the if condition") # diff block which not depend on if condition/ regardless of the result, should print this
10 45 I am outside the if condition
a = int(input())
b = int(input())
if a>b:
print(a)
if b>a:
print(b)
22 4 22
a = int(input())
b = int(input())
if a>b:
print(a)
print("if part")
else:
print(b)
print("else part")
10 10 10 else part
a = 10
b = 10
if a==b:
print("Equal")
elif a>b:
print("A")
else:
print("B")
print("Not in if")
Equal Not in if
a = int(input("Enter Marks :"))
if a >= 85:
print("A Grade")
elif (a < 85) and (a >= 80): #write prenthesis makes it more readable
print("A- Grade")
elif (a < 80) and (a >= 75):
print("B Grade")
elif (a < 75) and (a >= 70):
print("B- Grade")
else:
print("Below Average")
Enter Marks :64 Below Average
#esle 안쓰고 else 사용하기
a = 3
if a>10:
print(">10")
elif not(a>10):
print("Else part")
Else part
a = int(input())
if a > 10:
print(">10")
print("Inside the top if")
if a > 20:
print(">20")
print("Inside the nested if")
if a>30:
print(">30")
print("inside the nested if of nested if")
else:
print("<=30")
print("inside the else part of nested if of nested if")
else:
print("<=20")
print("Inside the else part of nested if")
print("Outside all ifs")
25 >10 Inside the top if >20 Inside the nested if <=30 inside the else part of nested if of nested if Outside all ifs
#single line comment
""" Multi line comment
User will enter a floating point number let say 238.915.
Your task is to find out the integer portion before the point (in this case 238)
and then check if that integer portion is an even number or not
"""
x = float(input("Enter a real number :"))
y = round(x)
if x>0:
if y>x:
intPortion = y-1 #29.6
else:
intPortion = y
else:
if y<x:
intPortion = y+1
else:
intPortion = y
if intPortion%2 == 0:
print("Evne")
else:
print("Odd")
Enter a real number :-87.3 Odd
round(-9.3)
-9
round(-9.6)
-10
n = int(input())
i = 1
while (i < n):
print(i**2)
print("This is iteration number:", i)
i += 1 #i = i+1
print("Loop done")
5 1 This is iteration number: 1 4 This is iteration number: 2 9 This is iteration number: 3 16 This is iteration number: 4 Loop done
n = 10
i = 1
while True:
if i%9 == 0:
print("Inside if")
break
else:
print("Inside else")
i = i+1
print("done")
Inside else Inside else Inside else Inside else Inside else Inside else Inside else Inside else Inside if done
n = 10
i = 1
while True:
if i%9 != 0:
print("inside if")
i +=1
continue
print("something")
print("somethingelse")
break
print("done")
inside if inside if inside if inside if inside if inside if inside if inside if something somethingelse done
L = []
for i in range(0,10,2): #0 start/10 end/ 2 step size
print(i)
L.append(i**2)
print(L)
0 2 4 6 8 [0, 4, 16, 36, 64]
S = {"apple", 4.9, "cherry"}
i = 1
for x in S: #as long as the x in S
print(x)
i += 1
if i == 3:
break
else:
pass
else:
print("Loop terminates with success")
print("Out side the loop")
apple 4.9 Out side the loop
D = {"A":10, "B":-19, "C":"abc"}
for x in D:
print(x, D[x])
A 10 B -19 C abc
""" Given a list of numbers i.e. [1,2,4,-5,7,9,3,2], make another list
that contains all the items in sorted order from min to max. i.e. your
result will be another list like [-5,1,2,2,3,4,7,9]
"""
L = [1,2,4,-5,7,9,3,2]
for j in range(len(L)): #length of L
m = L[j]
idx = j
c = j
for i in range(j,len(L)):
if L[i]<m:
m = L[i]
idx = c
c += 1
tmp = L[j]
L[j] = m
L[idx] = tmp
print(L)
[-5, 1, 2, 2, 3, 4, 7, 9]
#Refer stackoverflow answers
L = [1,2,4,-5,7,9,3,2]
m = L[0]
idx = 0
for i in L:
if i<m:
m = i
idx += 1
print(idx, m)
8 -5
def printSuccess():
print("I am done")
print("send me another task")
printSuccess()
I am done send me another task
3+8
11
printSuccess()
I am done send me another task
def printSuccess2():
"""This funcion is doing nothing except printing a message.
That message is "hellow"
""" #this is doc string #use doc string whenever use functions
print("hellow")
help(printSuccess2)
Help on function printSuccess2 in module __main__:
printSuccess2()
This funcion is doing nothing except printing a message.
That message is "hellow"
printSuccess2()
hellow
def printMessage(msg):
"""The function prints the message supplied by the user
or prints that msg is not in the form of string"""
if isinstance(msg,str):
print(msg)
else:
print("Your input argument is not string")
print("Here is the type of what you have supplied :", type(msg))
help(printMessage)
Help on function printMessage in module __main__:
printMessage(msg)
The function prints the message supplied by the user
or prints that msg is not in the form of string
printMessage??
printMessage("This is the message")
This is the message
printMessage(23)
Your input argument is not string Here is the type of what you have supplied : <class 'int'>
y = "hellow there"
printMessage(y)
hellow there
#multiple arguments
def mypow(a,b):
"""this function compute power just like builtin pow function"""
c = a**b
print(c)
mypow?
mypow(3,4)
81
def checkArgs(a,b,c):
if isinstance(a,(int,float)) and isinstance(b,(int,float)) and isinstance(c,(int,float)):
print((a+b+c)**2)
else:
print("Error: the input arguments are not of the expected types")
checkArgs(3,4,5)
144
checkArgs(3,4,"g")
Error: the input arguments are not of the expected types
checkArgs(3,4)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[177], line 1 ----> 1 checkArgs(3,4) TypeError: checkArgs() missing 1 required positional argument: 'c'
checkArgs(2,3,4,5)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[178], line 1 ----> 1 checkArgs(2,3,4,5) TypeError: checkArgs() takes 3 positional arguments but 4 were given
#Order of input arguments
def f(a,b,c):
print("A is :", a)
print("B is :", b)
print("C is :", c)
#f(2,3,"game")
f(3,"game",2)
A is : 3 B is : game C is : 2
#f(a = 2, b = 3, c ="game")
f(c = "game", a = 2, b = 3)
A is : 2 B is : 3 C is : game
#Function(return statement)
def myadd(a,b):
sumValue = a+b
return sumValue
d = myadd(2,3)
print(d)
5
variableOutSideTheFunction = 3
def g():
variableOutSideTheFunction = 5
print(variableOutSideTheFunction) # inside the function
g()
5
print(variableOutSideTheFunction) # outside the function
3
def g():
variableOutSideTheFunction = 5
#print(variableOutSideTheFunction) # inside the function
print(g()) #if print isn't set inside the function, then none will be result when print(that func()).
print(type(g()))
None <class 'NoneType'>
def h():
print("A")
a = 3
b = 5
c = a+b
print("something")
return # Like this if I dont return anything, it means exit the function right away
print("B")
print("C")
print(h())
A something None
def h():
print("A")
a = 3
b = 5
c = a+b
print("something")
return c
print("B")
print("C")
print(type(h()))
A something <class 'int'>
def r():
a = 5
b = 7
d = "something"
return a,b,d #can return multiple values
x,y,z = r()
print(x,y,z)
5 7 something
def myAddUniversal(*args):
s = 0
for i in range(len(args)):
s += args[i] #s = s+args[i]
return s
print(myAddUniversal(2,4,5))
11
def printAllVariableNamesAndValues(**args):# ** you'll receive key value pair list
for x in args:
print("Variable Name is :", x,"and Value is :", args[x])
printAllVariableNamesAndValues(a = 3, b = "B", c = "CCC", y = 6.7)
Variable Name is : a and Value is : 3 Variable Name is : b and Value is : B Variable Name is : c and Value is : CCC Variable Name is : y and Value is : 6.7
#default value
def gg(s=4):
print(s) #once press shift+enter, s is assigned as 4
gg()
4
gg(56)
56
L = [1,2,3]
L2 = L
L2[0] = -9
print(L)
[-9, 2, 3]
def ff(L =[1,2]):
for i in L:
print(i)
L2 = [12,3,4]
ff()
1 2
ff(L2)
12 3 4
ff() #already default value is assigned as L = [1,2]
1 2
#Modules
import sys
sys.path.append('/Users/soyeonpark/ABC')
#import all functions
import my_universal_functions as myfs
#import one function
#from my_universal_functions import addAllNumerics
myfs.addAllNumerics??
c = myfs.addAllNumerics(2,3,4,6)
print(c)
15
myfs.myName
'Python Cours'
""" Given a list of numbers i.e. [1,2,4,-5,7,9,3,2], make another list
that contains all the items in sorted order from min to max. i.e. your
result will be another list like [-5,1,2,2,3,4,7,9]
"""
' Given a list of numbers i.e. [1,2,4,-5,7,9,3,2], make another list\nthat contains all the items in sorted order from min to max. i.e. your \nresult will be another list like [-5,1,2,2,3,4,7,9]\n'
def findMin(L, startIndx):
m = L[startIndx]
idx = startIndx
for i in range(startIndx,len(L)):
x = L[i]
if x<m:
m = x
idx = i
else:
pass
i += 1
return m, idx
a,b = findMin([2,3,4,0,9])
print(a,b)
0 3
def swapValues(L,idx1,idx2):
tmp = L[idx1]
L[idx1] = L[idx2]
L[idx2] = tmp
return L
L = [2,3,6,7]
L2 = swapValues(L,1,3)
print(L2)
[2, 7, 6, 3]
from my_universal_functions import checkIfNotNumeric
def sortList(L):
if not(checkIfNotNumeric2(L)):
print("Error: List does not contain numeric values")
return
else:
c = 0
for x in L:
m, idx = findMin(L, c)
L = swapValues(L,c,idx)
c += 1
return L
L2 = sortList([2,1,5,3,-8,17])
print(L2)
[-8, 1, 2, 3, 5, 17]
checkIfNotNumeric??
checkIfNotNumeric2([2,1,5,3,-8,17])
True
def checkIfNotNumeric2(L):
for x in L:
if not(isinstance(x, (int, float))):
return False
return True
#String
s = "Python is a good language"
t = 'Its good for data science'
type(s)
str
print(s)
Python is a good language
print("hellow", 12, "hellow2", 'who are you', 5.9)
hellow 12 hellow2 who are you 5.9
v = s + " " + t #str+str+str
print(v)
Python is a good language Its good for data science
price = 12
s = "The price of this book"
v = s + ' is: '+ str(price) #str+str+int X --> str(int)
print(v)
print(s,"is:", price) #print() automatically add space between arguments/ in thic case, don't need to chage the type
The price of this book is: 12 The price of this book is: 12
#String(Multi line String)
a = """this is line 1
this is line 2
this is last line and this line is 3"""
print(a)
this is line 1 this is line 2 this is last line and this line is 3
print(""" The following options are available:
-a :does nothing
-b :also does nothing
""")
The following options are available:
-a :does nothing
-b :also does nothing
s = "How are you and who are you"
print(s[5])
r
print(type(s[5]))
<class 'str'>
s[3:8]
' are '
s[-1] #negative index is starting from right / -1 is the last letter
'u'
s[-12:-3]
' who are '
s[1] = "e" # it's not possible. it's inmutable(unchanable) once the string is declared
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[294], line 1 ----> 1 s[1] = "e" TypeError: 'str' object does not support item assignment
s[0:12:2] # it skips with the step size 2 till 12(excluded) s[start:end:step]
'Hwaeyu'
s
'How are you and who are you'
s[:12]
'How are you '
s[3:]
' are you and who are you'
s[1:12]
'ow are you '
s[::-1] # reverse the way
'uoy era ohw dna uoy era woH'
print(len(a))
66
print(len(a[3:8]))
5
a = " abc def hgq asdgeg"
b = a.strip() #remove space in the beginning and end
print(b)
abc def hgq asdgeg
a = "ABC deFg ;; sadfa QF"
b = a.lower() #change all the characters into lower case
print(b)
abc defg ;; sadfa qf
c = a.upper() # change to upper case
print(c)
ABC DEFG ;; SADFA QF
d = a.replace(";","*") #replace the first one to the second one
print(d)
ABC deFg ** sadfa QF
d = a.replace(";","**&&^^%%")
print(d)
ABC deFg **&&^^%%**&&^^%% sadfa QF
d = a.replace(";;","two semi colons")
print(d)
ABC deFg two semi colons sadfa QF
a = "abc;def;hgydfa;yy23" # want to separate them
L = a.split(";") #split elements by the standard " "
print(L) #abc is one element
['abc', 'def', 'hgydfa', 'yy23']
L[1]
'def'
#. and tap button --> appear all the functions that can be used
print(a.capitalize())
Abc;def;hgydfa;yy23
"abdAfadfGGQ".capitalize()
'Abdafadfggq'
help(a.count)
Help on built-in function count:
count(...) method of builtins.str instance
S.count(sub[, start[, end]]) -> int
Return the number of non-overlapping occurrences of substring sub in
string S[start:end]. Optional arguments start and end are
interpreted as in slice notation.
"abc" in "asdfsafsjflskfjabclskjf"
True
"abc" == "abc" #can use == at string
True
"abc" < "def" #what does it mean? --> following alphabet order in Python
True
"$%" < "*&"
True
"acd" not in "ackljlkfj"
True
print("we are learning "string" here")
Cell In[323], line 1 print("we are learning "string" here") ^ SyntaxError: invalid syntax. Perhaps you forgot a comma?
print("we are learning \"string\"here") #use backslash
we are learning "string"here
print('we are learning "string" here') # or use single quot
we are learning "string" here
print("we are \n now on another line") # for another line, use \n
we are now on another line
print("we are \t now on another line")
we are now on another line
print("c:\name\drive") # here \n acts like for another line
c: ame\drive
print(r"c:\name\drive") # use r ahead to accept it as a raw string
c:\name\drive
#List
L = [1,3,4.9,"name",3]
#Tuple
T = (1,3,4.9,"name",3)
#Set
S = {1,3,4.9,"name",3}
#Dictionary
D = {23:"twothree", 'B':43, 'C':'CCD'}
print("The type of L is ", type(L))
print("The type of T is ", type(T))
print("The type of S is ", type(S))
print("The type of D is ", type(D))
The type of L is <class 'list'> The type of T is <class 'tuple'> The type of S is <class 'set'> The type of D is <class 'dict'>
print(L[1])
print(T[1])
print(3 in S)
print(D[23])
3 3 True twothree
print(D['B'])
43
S # no duplicate
{1, 3, 4.9, 'name'}
L
[1, 3, 4.9, 'name', 3]
L[1:3]
[3, 4.9]
L[::-1]
[3, 'name', 4.9, 3, 1]
T[:3]
(1, 3, 4.9)
L = L + ["how", "are", 6, "you"] #add element
L
[1, 3, 4.9, 'name', 3, 'how', 'are', 6, 'you']
L.append(6.8) #add element
L
[1, 3, 4.9, 'name', 3, 'how', 'are', 6, 'you', 6.8]
T2 = ('a', 'b', 45)
T3 = T + T2 #combine tuples
T3
(1, 3, 4.9, 'name', 3, 'a', 'b', 45)
S
{1, 3, 4.9, 'name'}
S.add(56) #add element
S
{1, 3, 4.9, 56, 'name'}
S.update({23,"game",1}) #add multiple elements
S
{1, 23, 3, 4.9, 56, 'game', 'name'}
D
{23: 'twothree', 'B': 43, 'C': 'CCD'}
D['newKey'] = "newValue"
D
{23: 'twothree', 'B': 43, 'C': 'CCD', 'newKey': 'newValue'}
D2 = {"y":"YY", "z":10}
D3 = D + D2 #not possible to combine Dictionaries /But can update like D.update(D2)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[370], line 1 ----> 1 D3 = D + D2 #not possible to combine Dictionaries 3 D.update(D2) TypeError: unsupported operand type(s) for +: 'dict' and 'dict'
L
[1, 3, 4.9, 'name', 3, 'how', 'are', 6, 'you', 6.8]
del L[3] #delete element
L
[1, 3, 4.9, 3, 'how', 'are', 6, 'you', 6.8]
S
{1, 23, 3, 4.9, 56, 'game', 'name'}
S.remove('game')
S
{1, 23, 3, 4.9, 56, 'name'}
D
{23: 'twothree', 'B': 43, 'C': 'CCD', 'newKey': 'newValue'}
del D['C']
D
{23: 'twothree', 'B': 43, 'newKey': 'newValue'}
#copy function
L
[1, 3, 4.9, 3, 'how', 'are', 6, 'you', 6.8]
L2 = L
L2 #is saved in the same memory. so if we change one element in one list then the other is also changed
[1, 3, 4.9, 3, 'how', 'are', 6, 'you', 6.8]
L2[2] = "four point nine"
L2
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
L
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
L2 = L.copy() # should use copy function to save different memory/ Same at Set, Dictionary (Tuple doesn't use it)
L2
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
L
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
L2[1] = 'one'
L2
[1, 'one', 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
L
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
L3 = L[1:5] #L3 is completely in new memory
L3
[3, 'four point nine', 3, 'how']
L3[0] = "three"
L
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
help(L.append)
Help on built-in function append:
append(object, /) method of builtins.list instance
Append object to the end of the list.
L.clear?
L.pop?
L.reverse()
L
[6.8, 'you', 6, 'are', 'how', 3, 'four point nine', 3, 1]
L[::-1]
[1, 3, 'four point nine', 3, 'how', 'are', 6, 'you', 6.8]
D.items?
L
[6.8, 'you', 6, 'are', 'how', 3, 'four point nine', 3, 1]
T
(1, 3, 4.9, 'name', 3)
S
{1, 23, 3, 4.9, 56, 'name'}
D
{23: 'twothree', 'B': 43, 'newKey': 'newValue'}
D2 = {'A':L, 'B':T, 'C':S, 'D':D} # one data structure can have the other structure as element
D2['A'][3] #at the Value of A, want to know the third position
'are'
K = D2['D']
K
{23: 'twothree', 'B': 43, 'newKey': 'newValue'}
for x in K:
print(x,K[x])
23 twothree B 43 newKey newValue
L3 = [L,T,D,23,"game"] # it's also possible
type(L3[2])
dict
L3 = [x**2 for x in range(10)]
L3
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
S3 = {x**2 for x in range(2,20,3)}
S3
{4, 25, 64, 121, 196, 289}
"""Let say you are a teacher and you have different student
records containing id fo a student and the marks list in each subject
where different students have taken different number of subjects .All
these records are in hard copy. You want to enter all the data in computer
and want to compute the average marks of each student and display"""
def getDataFromUser():
D = {}
while True:
studentId = input("Enter student ID: ")
marksList = input("Enter the marks by comma separated values: ")
moreStudents = input('Enter "no" to quit insertion: ')
if studentId in D:
print(studentId, "is already inserted")
else:
D[studentId] = marksList.split(",")
if moreStudents.lower() == "no":
return D
studentData = getDataFromUser()
Enter student ID: 12 Enter the marks by comma separated values: 56,45,13 Enter "no" to quit insertion: asdf Enter student ID: 45 Enter the marks by comma separated values: 44,55,66,77,4 Enter "no" to quit insertion: asdfa Enter student ID: 12 Enter the marks by comma separated values: 45,45 Enter "no" to quit insertion: asdfg 12 is already inserted Enter student ID: 23 Enter the marks by comma separated values: 45,45 Enter "no" to quit insertion: no
studentData
{'12': ['56', '45', '13'],
'45': ['44', '55', '66', '77', '4'],
'23': ['45', '45']}
def getAvgMarks(D):
avgMarks = {}
for x in D:
L = D[x]
s = 0
for marks in L:
s += int(marks)
avgMarks[x] = s/len(L)
return avgMarks
avgM = getAvgMarks(studentData)
avgM
{'12': 38.0, '45': 49.2, '23': 45.0}
for x in avgM:
print("Student :", x, "got avg Marks as: ", avgM[x])
Student : 12 got avg Marks as: 38.0 Student : 45 got avg Marks as: 49.2 Student : 23 got avg Marks as: 45.0
import numpy as np
a = np.array([1,2,3,5,7]) #can define data type as well / i = integer
b = np.array((2,3,5), dtype= 'f') # f = float
print(a)
[1 2 3 5 7]
type(a)
numpy.ndarray
print(b)
[2 3 5]
type(b)
numpy.ndarray
a.dtype #what's the data type of a?
dtype('int32')
b.dtype #what's the data type of b?
dtype('float32')
#Numpy(Dimension)
import numpy as np
a = np.array([[1,2,3],[4,5,6]]) #2 dimensional array
a.ndim
2
a[0,2]
3
B = np.array([[1,2,3],[2,4,5,9]])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[452], line 1 ----> 1 B = np.array([[1,2,3],[2,4,5,9]]) ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.
B.ndim
1
C = np.array([[[1,2,3],[4,5,6],[0,0,-1]],[[-1,-2,-3],[-4,-5,-6],[0,0,1]]])
C.ndim
3
C.shape # each 2 dimensional array has 3 array which have 3 items
(2, 3, 3)
C[1,0,2]
-3
C.shape[2]
3
A = np.array([2]) # []
A.ndim
1
B = np.array(3) # no array due to doenst have []
B.ndim
0
C.size # total number of elements
18
C.nbytes #how many totals number of bytes
144
A = np.arange(100) #np.arange()
print(A)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]
A = np.arange(20,100,3) #(start, last(excluded), size) like for i in range(20,100,3)
print(A)
[20 23 26 29 32 35 38 41 44 47 50 53 56 59 62 65 68 71 74 77 80 83 86 89 92 95 98]
print(range(10)) #range() never create a list
range(0, 10)
print(list(range(10))) # if you want list then write list
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
A = np.random.permutation(np.arange(10)) #the arrage will be shown as randomly
print(A)
[7 1 3 6 2 9 0 4 8 5]
np.random.randint?
v = np.random.randint(20,30) #some random integer btw 20 and 30
type(v)
int
print(v)
21
print(v)
21
A = np.random.rand(1000) # random number btw 0 to 999
import matplotlib.pyplot as plt
plt.hist(A, bins=100)
(array([ 8., 11., 10., 12., 8., 14., 11., 12., 6., 10., 4., 10., 7.,
14., 11., 8., 8., 7., 8., 16., 15., 8., 6., 9., 14., 9.,
10., 16., 7., 9., 8., 7., 12., 11., 10., 14., 7., 14., 11.,
4., 11., 11., 9., 15., 13., 9., 10., 12., 12., 13., 10., 7.,
10., 7., 9., 8., 10., 8., 5., 9., 6., 6., 8., 12., 8.,
17., 14., 7., 7., 10., 14., 7., 9., 12., 13., 14., 5., 11.,
10., 11., 14., 9., 11., 16., 8., 7., 8., 12., 18., 7., 4.,
7., 13., 9., 8., 12., 11., 14., 11., 6.]),
array([1.81229342e-04, 1.01664234e-02, 2.01516174e-02, 3.01368115e-02,
4.01220055e-02, 5.01071996e-02, 6.00923936e-02, 7.00775877e-02,
8.00627817e-02, 9.00479758e-02, 1.00033170e-01, 1.10018364e-01,
1.20003558e-01, 1.29988752e-01, 1.39973946e-01, 1.49959140e-01,
1.59944334e-01, 1.69929528e-01, 1.79914722e-01, 1.89899916e-01,
1.99885110e-01, 2.09870304e-01, 2.19855498e-01, 2.29840692e-01,
2.39825886e-01, 2.49811081e-01, 2.59796275e-01, 2.69781469e-01,
2.79766663e-01, 2.89751857e-01, 2.99737051e-01, 3.09722245e-01,
3.19707439e-01, 3.29692633e-01, 3.39677827e-01, 3.49663021e-01,
3.59648215e-01, 3.69633409e-01, 3.79618603e-01, 3.89603797e-01,
3.99588991e-01, 4.09574185e-01, 4.19559379e-01, 4.29544573e-01,
4.39529767e-01, 4.49514961e-01, 4.59500155e-01, 4.69485350e-01,
4.79470544e-01, 4.89455738e-01, 4.99440932e-01, 5.09426126e-01,
5.19411320e-01, 5.29396514e-01, 5.39381708e-01, 5.49366902e-01,
5.59352096e-01, 5.69337290e-01, 5.79322484e-01, 5.89307678e-01,
5.99292872e-01, 6.09278066e-01, 6.19263260e-01, 6.29248454e-01,
6.39233648e-01, 6.49218842e-01, 6.59204036e-01, 6.69189230e-01,
6.79174425e-01, 6.89159619e-01, 6.99144813e-01, 7.09130007e-01,
7.19115201e-01, 7.29100395e-01, 7.39085589e-01, 7.49070783e-01,
7.59055977e-01, 7.69041171e-01, 7.79026365e-01, 7.89011559e-01,
7.98996753e-01, 8.08981947e-01, 8.18967141e-01, 8.28952335e-01,
8.38937529e-01, 8.48922723e-01, 8.58907917e-01, 8.68893111e-01,
8.78878305e-01, 8.88863499e-01, 8.98848694e-01, 9.08833888e-01,
9.18819082e-01, 9.28804276e-01, 9.38789470e-01, 9.48774664e-01,
9.58759858e-01, 9.68745052e-01, 9.78730246e-01, 9.88715440e-01,
9.98700634e-01]),
<BarContainer object of 100 artists>)
B = np.random.randn(10000)
plt.hist(B, bins=200)
(array([ 1., 0., 1., 0., 2., 2., 1., 1., 0., 2., 2.,
0., 0., 5., 4., 1., 5., 2., 5., 3., 4., 7.,
7., 5., 7., 2., 12., 8., 6., 11., 10., 10., 13.,
9., 11., 23., 19., 24., 17., 18., 30., 37., 29., 31.,
25., 38., 29., 37., 39., 30., 45., 49., 61., 49., 58.,
54., 65., 44., 78., 64., 68., 82., 73., 85., 82., 84.,
98., 86., 104., 88., 105., 103., 112., 102., 107., 129., 141.,
148., 130., 130., 119., 134., 134., 132., 157., 146., 150., 144.,
148., 151., 151., 170., 164., 145., 124., 148., 161., 146., 129.,
133., 155., 151., 134., 134., 138., 124., 138., 119., 112., 119.,
112., 116., 117., 109., 103., 112., 119., 96., 98., 83., 76.,
87., 71., 79., 67., 64., 58., 62., 52., 70., 66., 51.,
53., 53., 43., 33., 48., 39., 38., 33., 27., 33., 32.,
22., 21., 22., 13., 13., 21., 15., 12., 16., 17., 10.,
15., 6., 7., 5., 4., 11., 4., 2., 6., 7., 6.,
3., 0., 3., 2., 1., 3., 4., 0., 2., 1., 1.,
1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 1.]),
array([-3.46107023, -3.42369044, -3.38631066, -3.34893087, -3.31155109,
-3.2741713 , -3.23679152, -3.19941173, -3.16203195, -3.12465216,
-3.08727238, -3.0498926 , -3.01251281, -2.97513303, -2.93775324,
-2.90037346, -2.86299367, -2.82561389, -2.7882341 , -2.75085432,
-2.71347454, -2.67609475, -2.63871497, -2.60133518, -2.5639554 ,
-2.52657561, -2.48919583, -2.45181604, -2.41443626, -2.37705647,
-2.33967669, -2.30229691, -2.26491712, -2.22753734, -2.19015755,
-2.15277777, -2.11539798, -2.0780182 , -2.04063841, -2.00325863,
-1.96587884, -1.92849906, -1.89111928, -1.85373949, -1.81635971,
-1.77897992, -1.74160014, -1.70422035, -1.66684057, -1.62946078,
-1.592081 , -1.55470122, -1.51732143, -1.47994165, -1.44256186,
-1.40518208, -1.36780229, -1.33042251, -1.29304272, -1.25566294,
-1.21828315, -1.18090337, -1.14352359, -1.1061438 , -1.06876402,
-1.03138423, -0.99400445, -0.95662466, -0.91924488, -0.88186509,
-0.84448531, -0.80710552, -0.76972574, -0.73234596, -0.69496617,
-0.65758639, -0.6202066 , -0.58282682, -0.54544703, -0.50806725,
-0.47068746, -0.43330768, -0.3959279 , -0.35854811, -0.32116833,
-0.28378854, -0.24640876, -0.20902897, -0.17164919, -0.1342694 ,
-0.09688962, -0.05950983, -0.02213005, 0.01524973, 0.05262952,
0.0900093 , 0.12738909, 0.16476887, 0.20214866, 0.23952844,
0.27690823, 0.31428801, 0.35166779, 0.38904758, 0.42642736,
0.46380715, 0.50118693, 0.53856672, 0.5759465 , 0.61332629,
0.65070607, 0.68808586, 0.72546564, 0.76284542, 0.80022521,
0.83760499, 0.87498478, 0.91236456, 0.94974435, 0.98712413,
1.02450392, 1.0618837 , 1.09926349, 1.13664327, 1.17402305,
1.21140284, 1.24878262, 1.28616241, 1.32354219, 1.36092198,
1.39830176, 1.43568155, 1.47306133, 1.51044111, 1.5478209 ,
1.58520068, 1.62258047, 1.65996025, 1.69734004, 1.73471982,
1.77209961, 1.80947939, 1.84685918, 1.88423896, 1.92161874,
1.95899853, 1.99637831, 2.0337581 , 2.07113788, 2.10851767,
2.14589745, 2.18327724, 2.22065702, 2.25803681, 2.29541659,
2.33279637, 2.37017616, 2.40755594, 2.44493573, 2.48231551,
2.5196953 , 2.55707508, 2.59445487, 2.63183465, 2.66921443,
2.70659422, 2.743974 , 2.78135379, 2.81873357, 2.85611336,
2.89349314, 2.93087293, 2.96825271, 3.0056325 , 3.04301228,
3.08039206, 3.11777185, 3.15515163, 3.19253142, 3.2299112 ,
3.26729099, 3.30467077, 3.34205056, 3.37943034, 3.41681013,
3.45418991, 3.49156969, 3.52894948, 3.56632926, 3.60370905,
3.64108883, 3.67846862, 3.7158484 , 3.75322819, 3.79060797,
3.82798775, 3.86536754, 3.90274732, 3.94012711, 3.97750689,
4.01488668]),
<BarContainer object of 200 artists>)
C = np.random.rand(2,3) #creat random dimensional array
C
array([[0.28479749, 0.01496218, 0.21795265],
[0.29856608, 0.95685224, 0.28837714]])
C.ndim
2
C = np.random.rand(2,3,4,2)
C.ndim
4
C
array([[[[0.19808352, 0.90750045],
[0.13427161, 0.90885532],
[0.72531304, 0.67907578],
[0.82546805, 0.83697186]],
[[0.58072095, 0.80740941],
[0.71044031, 0.89191532],
[0.83809592, 0.12233693],
[0.61687587, 0.40968328]],
[[0.81907427, 0.41232916],
[0.16349493, 0.43935106],
[0.12297962, 0.00511371],
[0.30436043, 0.38564601]]],
[[[0.83505318, 0.75894666],
[0.79148087, 0.04404956],
[0.4473328 , 0.674635 ],
[0.22660527, 0.51864064]],
[[0.16631482, 0.44773438],
[0.76406986, 0.98746421],
[0.17695519, 0.62339607],
[0.78282051, 0.74379242]],
[[0.39200303, 0.76973911],
[0.89048465, 0.62529171],
[0.83263492, 0.4479187 ],
[0.38048678, 0.84980509]]]])
D = np.arange(100).reshape(4,25) #reshape arrange following (a,b)
D.shape
(4, 25)
D
array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24],
[25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49],
[50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
66, 67, 68, 69, 70, 71, 72, 73, 74],
[75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99]])
D = np.arange(100).reshape(4,5,5)
D.shape
(4, 5, 5)
D
array([[[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]],
[[25, 26, 27, 28, 29],
[30, 31, 32, 33, 34],
[35, 36, 37, 38, 39],
[40, 41, 42, 43, 44],
[45, 46, 47, 48, 49]],
[[50, 51, 52, 53, 54],
[55, 56, 57, 58, 59],
[60, 61, 62, 63, 64],
[65, 66, 67, 68, 69],
[70, 71, 72, 73, 74]],
[[75, 76, 77, 78, 79],
[80, 81, 82, 83, 84],
[85, 86, 87, 88, 89],
[90, 91, 92, 93, 94],
[95, 96, 97, 98, 99]]])
np.zeros?
np.ones?
#Numpy(Slicing)
A = np.arange(100)
print(A)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]
b = A[3:10]
print(b)
[3 4 5 6 7 8 9]
b[0] = -1200
b
array([-1200, 4, 5, 6, 7, 8, 9])
A #it's also changed because it's in the same memory / That's the big difference btw ordinary list and slicing
array([ 0, 1, 2, -1200, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62,
63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98,
99])
b = A[3:10].copy() # if want not to change / it's in the different memory
A[::5]
array([ 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80,
85, 90, 95])
A[::-5]
array([99, 94, 89, 84, 79, 74, 69, 64, 59, 54, 49, 44, 39, 34, 29, 24, 19,
14, 9, 4])
A[::-1]
array([ 99, 98, 97, 96, 95, 94, 93, 92, 91,
90, 89, 88, 87, 86, 85, 84, 83, 82,
81, 80, 79, 78, 77, 76, 75, 74, 73,
72, 71, 70, 69, 68, 67, 66, 65, 64,
63, 62, 61, 60, 59, 58, 57, 56, 55,
54, 53, 52, 51, 50, 49, 48, 47, 46,
45, 44, 43, 42, 41, 40, 39, 38, 37,
36, 35, 34, 33, 32, 31, 30, 29, 28,
27, 26, 25, 24, 23, 22, 21, 20, 19,
18, 17, 16, 15, 14, 13, 12, 11, 10,
9, 8, 7, 6, 5, 4, -1200, 2, 1,
0])
#I want to find where is the position of -1200
B = (A == -1200)*np.arange(A.size)
print(B)
[0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
A.indices(-1200)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[574], line 1 ----> 1 A.indices(-1200) AttributeError: 'numpy.ndarray' object has no attribute 'indices'
idx = np.argwhere(A==-1200)[0][0]
idx
3
A[idx] = 3
A
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
A = np.round(10*np.random.rand(5,4))
#A = np.random.rand(5,4)
#A
array([[0.4183266 , 0.01855416, 0.25106059, 0.86243359],
[0.16981521, 0.31147646, 0.95850589, 0.55175295],
[0.15552773, 0.10055072, 0.06009027, 0.82065224],
[0.95136422, 0.74344842, 0.26952486, 0.02808109],
[0.84601038, 0.01401601, 0.39694368, 0.2467477 ]])
A
array([[ 4., 1., 5., 5.],
[ 3., 1., 7., 4.],
[ 2., 8., 0., 7.],
[ 3., 0., 10., 10.],
[ 6., 8., 1., 7.]])
A[1,2]
7.0
A[1,:] # for whole second row
array([3., 1., 7., 4.])
A[:,1] # for whole second column
array([1., 1., 8., 0., 8.])
Z = A[1:3,2:4] #row number 1 to row number 3 & column number 2 to column number 4
Z
array([[7., 4.],
[0., 7.]])
Z.T # transpose 행과 열 바꾸기
array([[7., 0.],
[4., 7.]])
import numpy.linalg as la #linear algebra library 역행렬구하기
la.inv(np.random.rand(3,3))
array([[-0.82473966, 4.33580269, -0.46464079],
[ 1.50081031, 0.34083488, -0.36621829],
[-0.8377294 , -1.67459439, 1.4754175 ]])
Z
array([[1., 5.],
[5., 7.]])
Za = Z.sort(axis=0) #행 축으로 정렬 # 여기부터 다시 해보기
Za
A.sort(axis=1) #열 축으로 정렬
A
array([[0., 0., 1., 2.],
[1., 1., 3., 3.],
[2., 4., 4., 4.],
[5., 6., 6., 8.],
[7., 8., 8., 9.]])
#Numpy(More Indexing)
A = np.arange(100)
B = A[[3,5,6]]
B
array([3, 5, 6])
B[0] = -4
B
array([-4, 5, 6])
A # A doesn't change becuase we use B = A[[3,5,6]]
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
B = A[A<40] # access all elements less than 40
B
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39])
B = A[(A<40) & (A>30)] # btw 30 and 40
B
array([31, 32, 33, 34, 35, 36, 37, 38, 39])
#& for list / and for single object
#/ for list / or for single object
#~ for list/ not for single object
#Numpy(Broadcasting)
A = np.round(10*np.random.rand(2,3))
A
array([[2., 8., 3.],
[5., 4., 1.]])
A+3
array([[ 5., 11., 6.],
[ 8., 7., 4.]])
A+(np.arange(2).reshape(2,1))
array([[2., 8., 3.],
[6., 5., 2.]])
print(np.arange(2))
[0 1]
#stacks
B= np.round(10*np.random.rand(2,2))
A
array([[2., 8., 3.],
[5., 4., 1.]])
B
array([[5., 9.],
[4., 4.]])
C = np.hstack((A,B)) #가로로
C
array([[2., 8., 3., 5., 9.],
[5., 4., 1., 4., 4.]])
A = np.random.permutation(np.arange(10))
A
array([2, 3, 8, 6, 4, 5, 7, 1, 0, 9])
A.sort() #ascending 오름차순
A
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.sort(A)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
A.sort()
A=A[::-1] #descending 내림차순
A
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
A = np.array(["abc",'howare you','u765','132r'])
A.sort() # possible in strings like alphabet order
A
array(['132r', 'abc', 'howare you', 'u765'], dtype='<U10')
#Numpy(Speed: ufuncs) numpy function is faster for large number/elements
B= np.random.rand(1000000)
%timeit sum(B)
%timeit np.sum(B) #same with B.sum()
38.4 ms ± 144 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) 184 µs ± 1.86 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
def mySum(G):
s = 0
for x in G:
s+=x
return s
%timeit mySum(B)
43.5 ms ± 506 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
import pandas as pd
print(pd.__version__)
1.5.3
A = pd.Series([2,3,4,5], index = ['a','b','c','d']) # Series handle one dimensional array
A.values
array([2, 3, 4, 5])
type(A.values)
numpy.ndarray
type(A)
pandas.core.series.Series
A.index
Index(['a', 'b', 'c', 'd'], dtype='object')
A['a']
2
A['a':'c'] #using idex then the final idex is also included
a 2 b 3 c 4 dtype: int64
#Pandas(Series) using dictionary
grads_dict={'A':4, 'B':3.5, 'C':3, 'D':2.5}
grads = pd.Series(grads_dict)
grads.values
array([4. , 3.5, 3. , 2.5])
marks_dict = {'A':85, 'B':75, 'C':65, 'D':55}
marks = pd.Series(marks_dict)
marks
A 85 B 75 C 65 D 55 dtype: int64
marks['A']
85
marks[0:2]
A 85 B 75 dtype: int64
#Pandas(DataFrame)
D = pd.DataFrame({'Marks':marks, 'Grades':grads})
D
| Marks | Grades | |
|---|---|---|
| A | 85 | 4.0 |
| B | 75 | 3.5 |
| C | 65 | 3.0 |
| D | 55 | 2.5 |
D.T #can transpose as well
| A | B | C | D | |
|---|---|---|---|---|
| Marks | 85.0 | 75.0 | 65.0 | 55.0 |
| Grades | 4.0 | 3.5 | 3.0 | 2.5 |
D
| Marks | Grades | |
|---|---|---|
| A | 85 | 4.0 |
| B | 75 | 3.5 |
| C | 65 | 3.0 |
| D | 55 | 2.5 |
D.values
array([[85. , 4. ],
[75. , 3.5],
[65. , 3. ],
[55. , 2.5]])
D.values[2,0] #row #3 column #1
65.0
D.columns
Index(['Marks', 'Grades'], dtype='object')
D
| Marks | Grades | |
|---|---|---|
| A | 85 | 4.0 |
| B | 75 | 3.5 |
| C | 65 | 3.0 |
| D | 55 | 2.5 |
D['ScaledMarks'] = 100*(D['Marks']/90) # want to add column
D
| Marks | Grades | ScaledMarks | |
|---|---|---|---|
| A | 85 | 4.0 | 94.444444 |
| B | 75 | 3.5 | 83.333333 |
| C | 65 | 3.0 | 72.222222 |
| D | 55 | 2.5 | 61.111111 |
del D['ScaledMarks'] #want to delete column
D
| Marks | Grades | |
|---|---|---|
| A | 85 | 4.0 |
| B | 75 | 3.5 |
| C | 65 | 3.0 |
| D | 55 | 2.5 |
G = D[D['Marks']>70] # want to pick some data
G
| Marks | Grades | |
|---|---|---|
| A | 85 | 4.0 |
| B | 75 | 3.5 |
#Pandas(NaN) -- deal with missing values (None)
A = pd.DataFrame([{'a':1, 'b':4}, {'b':-3, 'c':9}])
A
| a | b | c | |
|---|---|---|---|
| 0 | 1.0 | 4 | NaN |
| 1 | NaN | -3 | 9.0 |
A.fillna(0) #fill all na value as 0
| a | b | c | |
|---|---|---|---|
| 0 | 1.0 | 4 | 0.0 |
| 1 | 0.0 | -3 | 9.0 |
A.dropna #drop all the missing values
<bound method DataFrame.dropna of a b c 0 1.0 4 NaN 1 NaN -3 9.0>
#Pandas(Indexing)
A = pd.Series(['a','b','c'], index = [1, 3, 5])
A[1]
'a'
A[1:3]
3 b 5 c dtype: object
A.loc[1:3] #loc : use explicit indexes
1 a 3 b dtype: object
A.iloc[1:3] #iloc: use implicit indexes 기본적으로 첫 번째 값 위치 0
3 b 5 c dtype: object
D
| Marks | Grades | |
|---|---|---|
| A | 85 | 4.0 |
| B | 75 | 3.5 |
| C | 65 | 3.0 |
| D | 55 | 2.5 |
D.iloc[2,:]
Marks 65.0 Grades 3.0 Name: C, dtype: float64
D.iloc[::-1,:] # want to reverse all the values
| Marks | Grades | |
|---|---|---|
| D | 55 | 2.5 |
| C | 65 | 3.0 |
| B | 75 | 3.5 |
| A | 85 | 4.0 |
#Pandas(csv files)
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
df = pd.read_csv('/Users/soyeonpark/Downloads/covid_19_data.csv')
df.head(10) #first 10 records
| SNo | ObservationDate | Province/State | Country/Region | Last Update | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 01/22/2020 | Anhui | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 1 | 2 | 01/22/2020 | Beijing | Mainland China | 1/22/2020 17:00 | 14.0 | 0.0 | 0.0 |
| 2 | 3 | 01/22/2020 | Chongqing | Mainland China | 1/22/2020 17:00 | 6.0 | 0.0 | 0.0 |
| 3 | 4 | 01/22/2020 | Fujian | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 4 | 5 | 01/22/2020 | Gansu | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| 5 | 6 | 01/22/2020 | Guangdong | Mainland China | 1/22/2020 17:00 | 26.0 | 0.0 | 0.0 |
| 6 | 7 | 01/22/2020 | Guangxi | Mainland China | 1/22/2020 17:00 | 2.0 | 0.0 | 0.0 |
| 7 | 8 | 01/22/2020 | Guizhou | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 8 | 9 | 01/22/2020 | Hainan | Mainland China | 1/22/2020 17:00 | 4.0 | 0.0 | 0.0 |
| 9 | 10 | 01/22/2020 | Hebei | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
df.drop(['SNo', 'Last Update'], axis = 1, inplace = True)
# want to delete column. 'axis=1' is do that on the column and 'implace=True' is apply this to df
df.head()
| ObservationDate | Province/State | Country/Region | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|
| 0 | 01/22/2020 | Anhui | Mainland China | 1.0 | 0.0 | 0.0 |
| 1 | 01/22/2020 | Beijing | Mainland China | 14.0 | 0.0 | 0.0 |
| 2 | 01/22/2020 | Chongqing | Mainland China | 6.0 | 0.0 | 0.0 |
| 3 | 01/22/2020 | Fujian | Mainland China | 1.0 | 0.0 | 0.0 |
| 4 | 01/22/2020 | Gansu | Mainland China | 0.0 | 0.0 | 0.0 |
df.rename(columns = {'ObservationDate':'Date', 'Province/State':'Province', 'Country/Region':'Country'}, inplace = True)
#want to rename the column name
df.head()
| Date | Province | Country | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|
| 0 | 01/22/2020 | Anhui | Mainland China | 1.0 | 0.0 | 0.0 |
| 1 | 01/22/2020 | Beijing | Mainland China | 14.0 | 0.0 | 0.0 |
| 2 | 01/22/2020 | Chongqing | Mainland China | 6.0 | 0.0 | 0.0 |
| 3 | 01/22/2020 | Fujian | Mainland China | 1.0 | 0.0 | 0.0 |
| 4 | 01/22/2020 | Gansu | Mainland China | 0.0 | 0.0 | 0.0 |
df['Date'] = pd.to_datetime(df['Date'])
#convert date format to use in Pandas
df.head()
| Date | Province | Country | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Anhui | Mainland China | 1.0 | 0.0 | 0.0 |
| 1 | 2020-01-22 | Beijing | Mainland China | 14.0 | 0.0 | 0.0 |
| 2 | 2020-01-22 | Chongqing | Mainland China | 6.0 | 0.0 | 0.0 |
| 3 | 2020-01-22 | Fujian | Mainland China | 1.0 | 0.0 | 0.0 |
| 4 | 2020-01-22 | Gansu | Mainland China | 0.0 | 0.0 | 0.0 |
df.describe()
| Confirmed | Deaths | Recovered | |
|---|---|---|---|
| count | 3.064290e+05 | 306429.000000 | 3.064290e+05 |
| mean | 8.567091e+04 | 2036.403268 | 5.042029e+04 |
| std | 2.775516e+05 | 6410.938048 | 2.015124e+05 |
| min | -3.028440e+05 | -178.000000 | -8.544050e+05 |
| 25% | 1.042000e+03 | 13.000000 | 1.100000e+01 |
| 50% | 1.037500e+04 | 192.000000 | 1.751000e+03 |
| 75% | 5.075200e+04 | 1322.000000 | 2.027000e+04 |
| max | 5.863138e+06 | 112385.000000 | 6.399531e+06 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 306429 entries, 0 to 306428 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 306429 non-null datetime64[ns] 1 Province 228329 non-null object 2 Country 306429 non-null object 3 Confirmed 306429 non-null float64 4 Deaths 306429 non-null float64 5 Recovered 306429 non-null float64 dtypes: datetime64[ns](1), float64(3), object(2) memory usage: 14.0+ MB
df = df.fillna('NA') # fill in the blank as NA
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 306429 entries, 0 to 306428 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 306429 non-null datetime64[ns] 1 Province 306429 non-null object 2 Country 306429 non-null object 3 Confirmed 306429 non-null float64 4 Deaths 306429 non-null float64 5 Recovered 306429 non-null float64 dtypes: datetime64[ns](1), float64(3), object(2) memory usage: 14.0+ MB
df.head(10)
| Date | Province | Country | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Anhui | Mainland China | 1.0 | 0.0 | 0.0 |
| 1 | 2020-01-22 | Beijing | Mainland China | 14.0 | 0.0 | 0.0 |
| 2 | 2020-01-22 | Chongqing | Mainland China | 6.0 | 0.0 | 0.0 |
| 3 | 2020-01-22 | Fujian | Mainland China | 1.0 | 0.0 | 0.0 |
| 4 | 2020-01-22 | Gansu | Mainland China | 0.0 | 0.0 | 0.0 |
| 5 | 2020-01-22 | Guangdong | Mainland China | 26.0 | 0.0 | 0.0 |
| 6 | 2020-01-22 | Guangxi | Mainland China | 2.0 | 0.0 | 0.0 |
| 7 | 2020-01-22 | Guizhou | Mainland China | 1.0 | 0.0 | 0.0 |
| 8 | 2020-01-22 | Hainan | Mainland China | 4.0 | 0.0 | 0.0 |
| 9 | 2020-01-22 | Hebei | Mainland China | 1.0 | 0.0 | 0.0 |
#EX:how many total confirmed case in each countries?
df2 = df.groupby('Country')[['Country','Confirmed','Deaths','Recovered']].sum(numeric_only=True).reset_index()
#Country 는 숫자 데이터가 아니라서 numeric_only=True 넣어줌
df2
| Country | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|
| 0 | Azerbaijan | 1.0 | 0.0 | 0.0 |
| 1 | ('St. Martin',) | 2.0 | 0.0 | 0.0 |
| 2 | Afghanistan | 17026442.0 | 669075.0 | 13464399.0 |
| 3 | Albania | 19768869.0 | 375955.0 | 13945256.0 |
| 4 | Algeria | 27684358.0 | 834464.0 | 18959299.0 |
| ... | ... | ... | ... | ... |
| 224 | West Bank and Gaza | 41819444.0 | 440378.0 | 37003116.0 |
| 225 | Yemen | 962066.0 | 237613.0 | 506523.0 |
| 226 | Zambia | 13493953.0 | 205990.0 | 12625626.0 |
| 227 | Zimbabwe | 6484581.0 | 237234.0 | 5594887.0 |
| 228 | occupied Palestinian territory | 25.0 | 0.0 | 0.0 |
229 rows × 4 columns
#grupby 'Country' and 'Date'
df2 = df.groupby(['Country','Date'])[['Country','Date','Confirmed','Deaths','Recovered']].sum(numeric_only=True).reset_index()
df2
| Country | Date | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|
| 0 | Azerbaijan | 2020-02-28 | 1.0 | 0.0 | 0.0 |
| 1 | ('St. Martin',) | 2020-03-10 | 2.0 | 0.0 | 0.0 |
| 2 | Afghanistan | 2020-02-24 | 1.0 | 0.0 | 0.0 |
| 3 | Afghanistan | 2020-02-25 | 1.0 | 0.0 | 0.0 |
| 4 | Afghanistan | 2020-02-26 | 1.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... |
| 87276 | occupied Palestinian territory | 2020-03-12 | 0.0 | 0.0 | 0.0 |
| 87277 | occupied Palestinian territory | 2020-03-14 | 0.0 | 0.0 | 0.0 |
| 87278 | occupied Palestinian territory | 2020-03-15 | 0.0 | 0.0 | 0.0 |
| 87279 | occupied Palestinian territory | 2020-03-16 | 0.0 | 0.0 | 0.0 |
| 87280 | occupied Palestinian territory | 2020-03-17 | 0.0 | 0.0 | 0.0 |
87281 rows × 5 columns
df3 = df2[df2['Confirmed']>100] #want case which have confirmed more than 100
df3
| Country | Date | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|
| 35 | Afghanistan | 2020-03-28 | 107.0 | 4.0 | 2.0 |
| 36 | Afghanistan | 2020-03-29 | 118.0 | 4.0 | 2.0 |
| 37 | Afghanistan | 2020-03-30 | 146.0 | 4.0 | 2.0 |
| 38 | Afghanistan | 2020-03-31 | 175.0 | 4.0 | 5.0 |
| 39 | Afghanistan | 2020-04-01 | 197.0 | 4.0 | 5.0 |
| ... | ... | ... | ... | ... | ... |
| 87269 | Zimbabwe | 2021-05-25 | 38706.0 | 1587.0 | 36517.0 |
| 87270 | Zimbabwe | 2021-05-26 | 38819.0 | 1589.0 | 36531.0 |
| 87271 | Zimbabwe | 2021-05-27 | 38854.0 | 1592.0 | 36541.0 |
| 87272 | Zimbabwe | 2021-05-28 | 38918.0 | 1592.0 | 36563.0 |
| 87273 | Zimbabwe | 2021-05-29 | 38933.0 | 1594.0 | 36578.0 |
75055 rows × 5 columns
import matplotlib.pyplot as plt
x = np.linspace(0,10,1000)
y = np.sin(x)
plt.plot(x,y)
[<matplotlib.lines.Line2D at 0x16a730dd0>]
plt.scatter(x[::10],y[::10], color = 'red')
<matplotlib.collections.PathCollection at 0x16a9261d0>
# can make two plot
plt.plot(x,y,color='b')
plt.plot(x,np.cos(x),color='g')
[<matplotlib.lines.Line2D at 0x16aacddd0>]
#Project Covid 19 using matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
df = pd.read_csv('/Users/soyeonpark/Downloads/covid_19_data.csv')
df
| SNo | ObservationDate | Province/State | Country/Region | Last Update | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 01/22/2020 | Anhui | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 1 | 2 | 01/22/2020 | Beijing | Mainland China | 1/22/2020 17:00 | 14.0 | 0.0 | 0.0 |
| 2 | 3 | 01/22/2020 | Chongqing | Mainland China | 1/22/2020 17:00 | 6.0 | 0.0 | 0.0 |
| 3 | 4 | 01/22/2020 | Fujian | Mainland China | 1/22/2020 17:00 | 1.0 | 0.0 | 0.0 |
| 4 | 5 | 01/22/2020 | Gansu | Mainland China | 1/22/2020 17:00 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 306424 | 306425 | 05/29/2021 | Zaporizhia Oblast | Ukraine | 2021-05-30 04:20:55 | 102641.0 | 2335.0 | 95289.0 |
| 306425 | 306426 | 05/29/2021 | Zeeland | Netherlands | 2021-05-30 04:20:55 | 29147.0 | 245.0 | 0.0 |
| 306426 | 306427 | 05/29/2021 | Zhejiang | Mainland China | 2021-05-30 04:20:55 | 1364.0 | 1.0 | 1324.0 |
| 306427 | 306428 | 05/29/2021 | Zhytomyr Oblast | Ukraine | 2021-05-30 04:20:55 | 87550.0 | 1738.0 | 83790.0 |
| 306428 | 306429 | 05/29/2021 | Zuid-Holland | Netherlands | 2021-05-30 04:20:55 | 391559.0 | 4252.0 | 0.0 |
306429 rows × 8 columns
df.drop(['SNo','Last Update'], axis=1, inplace=True)
df.rename(columns = {'ObservationDate':'Date', 'Province/State':'Province', 'Country/Region':'Country'}, inplace = True)
df.head()
| Date | Province | Country | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|
| 0 | 01/22/2020 | Anhui | Mainland China | 1.0 | 0.0 | 0.0 |
| 1 | 01/22/2020 | Beijing | Mainland China | 14.0 | 0.0 | 0.0 |
| 2 | 01/22/2020 | Chongqing | Mainland China | 6.0 | 0.0 | 0.0 |
| 3 | 01/22/2020 | Fujian | Mainland China | 1.0 | 0.0 | 0.0 |
| 4 | 01/22/2020 | Gansu | Mainland China | 0.0 | 0.0 | 0.0 |
df['Date'] = pd.to_datetime(df['Date'])
df.head()
| Date | Province | Country | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Anhui | Mainland China | 1.0 | 0.0 | 0.0 |
| 1 | 2020-01-22 | Beijing | Mainland China | 14.0 | 0.0 | 0.0 |
| 2 | 2020-01-22 | Chongqing | Mainland China | 6.0 | 0.0 | 0.0 |
| 3 | 2020-01-22 | Fujian | Mainland China | 1.0 | 0.0 | 0.0 |
| 4 | 2020-01-22 | Gansu | Mainland China | 0.0 | 0.0 | 0.0 |
imputer = SimpleImputer(strategy='constant') #missing values 결측치처리 좀 더 알아보기
df2 = pd.DataFrame(imputer.fit_transform(df),columns=df.columns)
df2
| Date | Province | Country | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Anhui | Mainland China | 1.0 | 0.0 | 0.0 |
| 1 | 2020-01-22 | Beijing | Mainland China | 14.0 | 0.0 | 0.0 |
| 2 | 2020-01-22 | Chongqing | Mainland China | 6.0 | 0.0 | 0.0 |
| 3 | 2020-01-22 | Fujian | Mainland China | 1.0 | 0.0 | 0.0 |
| 4 | 2020-01-22 | Gansu | Mainland China | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 306424 | 2021-05-29 | Zaporizhia Oblast | Ukraine | 102641.0 | 2335.0 | 95289.0 |
| 306425 | 2021-05-29 | Zeeland | Netherlands | 29147.0 | 245.0 | 0.0 |
| 306426 | 2021-05-29 | Zhejiang | Mainland China | 1364.0 | 1.0 | 1324.0 |
| 306427 | 2021-05-29 | Zhytomyr Oblast | Ukraine | 87550.0 | 1738.0 | 83790.0 |
| 306428 | 2021-05-29 | Zuid-Holland | Netherlands | 391559.0 | 4252.0 | 0.0 |
306429 rows × 6 columns
# 왜 confirmed, deaths, recovered 가 안 나오지???
df3 = df2.groupby(['Country','Date'])[['Confirmed','Deaths','Recovered']].sum().reset_index() # only nemeric columns
df3.head()
| Country | Date | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|
| 0 | Azerbaijan | 2020-02-28 | 1.0 | 0.0 | 0.0 |
| 1 | ('St. Martin',) | 2020-03-10 | 2.0 | 0.0 | 0.0 |
| 2 | Afghanistan | 2020-02-24 | 1.0 | 0.0 | 0.0 |
| 3 | Afghanistan | 2020-02-25 | 1.0 | 0.0 | 0.0 |
| 4 | Afghanistan | 2020-02-26 | 1.0 | 0.0 | 0.0 |
type(df2['Confirmed'])
pandas.core.series.Series
countries = df3['Country'].unique()
len(countries)
229
for idx in range(0,len(countries)):
C = df3[df3['Country']==regions[idx]].reset_index()
plt.scatter(np.arange(0,len(C)),C['Confirmed'],color ='b', label='Confirmed')
plt.scatter(np.arange(0,len(C)),C['Recovered'],color ='g', label='Recovered')
plt.scatter(np.arange(0,len(C)),C['Deaths'],color ='r', label='Deaths')
plt.title(countries[idx])
plt.xlabel('Days since the first suspect')
plt.ylabel('Number of cases')
plt.legend() #범례
plt.show() #그래프 보여줘
df4 = df3.groupby(['Date'])[['Confirmed','Deaths','Recovered']].sum().reset_index()
df4.head
<bound method NDFrame.head of Date Confirmed Deaths Recovered 0 2020-01-22 557.0 17.0 30.0 1 2020-01-23 1097.0 34.0 60.0 2 2020-01-24 941.0 26.0 39.0 3 2020-01-25 1437.0 42.0 42.0 4 2020-01-26 2118.0 56.0 56.0 .. ... ... ... ... 489 2021-05-25 167848207.0 3485788.0 104824935.0 490 2021-05-26 168416423.0 3498544.0 105380748.0 491 2021-05-27 168970791.0 3511297.0 106078106.0 492 2021-05-28 169470725.0 3523117.0 106633069.0 493 2021-05-29 169951560.0 3533619.0 107140669.0 [494 rows x 4 columns]>
C = df4
plt.scatter(np.arange(0,len(C)),C['Confirmed'],color='b',label='Confirmed')
plt.scatter(np.arange(0,len(C)),C['Recovered'],color='g',label='Recovered')
plt.scatter(np.arange(0,len(C)),C['Deaths'],color='r',label='Deaths')
plt.title('world')
plt.xlabel('Days since the first suspect')
plt.ylabel('Number of cases')
plt.legend()
plt.show()